In [1]:
# Logger
import logging
logging.basicConfig(level=logging.INFO)

# Import local paths
import sys, os
sys.path.insert(0, os.path.abspath('../../..'))
sys.path.insert(0, os.path.abspath('../../../../openai-envs'))

# Enable automatic module reload
%load_ext autoreload
%autoreload 2

# Load PyALCS module
from lcs import Perception
from lcs.agents.acs2 import ACS2, Configuration, ClassifiersList

# Load environments
import gym
import gym_grid

Grid

Actions:

MOVE_LEFT = 0
MOVE_RIGHT = 1
MOVE_UP = 2
MOVE_DOWN = 3

In [2]:
cfg = Configuration(
        classifier_length=2,
        number_of_possible_actions=4,
        epsilon=1.0,
        beta=0.2,
        gamma=0.95,
        theta_exp=50,
        theta_ga=50,
        do_ga=True,
        mu=0.4,
        u_max=2,
        metrics_trial_frequency=10)

In [3]:
def print_cl(cl):
    action = None
    if cl.action == 0:
        action = '⬅'
    if cl.action == 1:
        action = '➡'
    if cl.action == 2:
        action = '⬆'
    if cl.action == 3:
        action = '⬇'
    print(f"{cl.condition} - {action} - {cl.effect} [fit: {cl.fitness:.3f}, r: {cl.r:.2f}, ir: {cl.ir:.2f}]")

In [4]:
def print_simple_stats(population, metrics):
    pop_size = len(population)
    
    # avg step in trial
    steps = []
    for m in metrics:
        steps.append(m['steps_in_trial'])

    print(f"Population of {pop_size}, avg steps {sum(steps)/len(steps)}, all steps {sum(steps)}")

Grid

Exploration


In [5]:
grid = gym.make('grid-20-v0')

In [6]:
%%time
agent = ACS2(cfg)
population, metrics = agent.explore(grid, 5_000)


INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 11, 'reward': 1000}
INFO:lcs.agents.Agent:{'trial': 500, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 1000, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 1500, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 2000, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 2500, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 3000, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 3500, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 4000, 'steps_in_trial': 1000, 'reward': 0}
INFO:lcs.agents.Agent:{'trial': 4500, 'steps_in_trial': 1000, 'reward': 0}
CPU times: user 28min 41s, sys: 12.6 s, total: 28min 54s
Wall time: 31min 3s

In [7]:
print_simple_stats(population, metrics)


Population of 80, avg steps 986.084, all steps 493042

In [8]:
for cl in sorted(population, key=lambda cl: -cl.fitness):
    print_cl(cl)


19# - ➡ - 20# [fit: 226.660, r: 226.66, ir: 160.00]
18# - ➡ - 19# [fit: 155.493, r: 155.49, ir: 0.00]
20# - ⬅ - 19# [fit: 106.599, r: 106.60, ir: 0.00]
19# - ⬅ - 18# [fit: 103.800, r: 103.80, ir: 0.00]
17# - ➡ - 18# [fit: 82.784, r: 82.78, ir: 0.00]
20# - ➡ - ## [fit: 80.743, r: 80.74, ir: 0.00]
18# - ⬅ - 17# [fit: 75.871, r: 75.87, ir: 0.00]
16# - ➡ - 17# [fit: 73.971, r: 73.97, ir: 0.00]
15# - ➡ - 16# [fit: 72.763, r: 72.76, ir: 0.00]
17# - ⬅ - 16# [fit: 72.058, r: 72.06, ir: 0.00]
16# - ⬅ - 15# [fit: 71.064, r: 71.06, ir: 0.00]
14# - ➡ - 15# [fit: 70.923, r: 70.92, ir: 0.00]
13# - ➡ - 14# [fit: 68.927, r: 68.93, ir: 0.00]
15# - ⬅ - 14# [fit: 68.457, r: 68.46, ir: 0.00]
12# - ➡ - 13# [fit: 66.596, r: 66.60, ir: 0.00]
14# - ⬅ - 13# [fit: 66.504, r: 66.50, ir: 0.00]
11# - ➡ - 12# [fit: 64.269, r: 64.27, ir: 0.00]
13# - ⬅ - 12# [fit: 64.027, r: 64.03, ir: 0.00]
10# - ➡ - 11# [fit: 62.474, r: 62.47, ir: 0.00]
12# - ⬅ - 11# [fit: 61.857, r: 61.86, ir: 0.00]
11# - ⬅ - 10# [fit: 60.134, r: 60.13, ir: 0.00]
9# - ➡ - 10# [fit: 60.063, r: 60.06, ir: 0.00]
8# - ➡ - 9# [fit: 57.547, r: 57.55, ir: 0.00]
10# - ⬅ - 9# [fit: 57.413, r: 57.41, ir: 0.00]
#12 - ⬇ - #11 [fit: 55.167, r: 55.17, ir: 0.00]
9# - ⬅ - 8# [fit: 54.934, r: 54.93, ir: 0.00]
7# - ➡ - 8# [fit: 54.875, r: 54.88, ir: 0.00]
#13 - ⬇ - #12 [fit: 53.551, r: 53.55, ir: 0.00]
#11 - ⬆ - #12 [fit: 53.273, r: 53.27, ir: 0.00]
8# - ⬅ - 7# [fit: 52.271, r: 52.27, ir: 0.00]
6# - ➡ - 7# [fit: 52.239, r: 52.24, ir: 0.00]
#10 - ⬆ - #11 [fit: 52.160, r: 52.16, ir: 0.00]
#9 - ⬆ - #10 [fit: 52.081, r: 52.08, ir: 0.00]
#11 - ⬇ - #10 [fit: 52.062, r: 52.06, ir: 0.00]
#10 - ⬇ - #9 [fit: 51.901, r: 51.90, ir: 0.00]
#8 - ⬆ - #9 [fit: 51.229, r: 51.23, ir: 0.00]
#14 - ⬇ - #13 [fit: 50.759, r: 50.76, ir: 0.00]
#12 - ⬆ - #13 [fit: 50.724, r: 50.72, ir: 0.00]
#7 - ⬆ - #8 [fit: 50.339, r: 50.34, ir: 0.00]
#9 - ⬇ - #8 [fit: 50.022, r: 50.02, ir: 0.00]
5# - ➡ - 6# [fit: 49.893, r: 49.89, ir: 0.00]
7# - ⬅ - 6# [fit: 49.758, r: 49.76, ir: 0.00]
1# - ⬅ - ## [fit: 49.335, r: 49.34, ir: 0.00]
#14 - ⬆ - #15 [fit: 48.909, r: 48.91, ir: 0.00]
#15 - ⬆ - #16 [fit: 48.661, r: 48.66, ir: 0.00]
#15 - ⬇ - #14 [fit: 48.441, r: 48.44, ir: 0.00]
2# - ➡ - 3# [fit: 48.344, r: 48.34, ir: 0.00]
#8 - ⬇ - #7 [fit: 48.287, r: 48.29, ir: 0.00]
4# - ➡ - 5# [fit: 48.231, r: 48.23, ir: 0.00]
#6 - ⬆ - #7 [fit: 48.215, r: 48.21, ir: 0.00]
6# - ⬅ - 5# [fit: 48.114, r: 48.11, ir: 0.00]
1# - ➡ - 2# [fit: 48.107, r: 48.11, ir: 0.00]
3# - ➡ - 4# [fit: 48.056, r: 48.06, ir: 0.00]
#16 - ⬇ - #15 [fit: 47.894, r: 47.89, ir: 0.00]
5# - ⬅ - 4# [fit: 47.667, r: 47.67, ir: 0.00]
4# - ⬅ - 3# [fit: 47.556, r: 47.56, ir: 0.00]
#19 - ⬆ - #20 [fit: 47.174, r: 47.17, ir: 0.00]
2# - ⬅ - 1# [fit: 47.123, r: 47.12, ir: 0.00]
#17 - ⬇ - #16 [fit: 47.040, r: 47.04, ir: 0.00]
#16 - ⬆ - #17 [fit: 46.905, r: 46.91, ir: 0.00]
#17 - ⬆ - #18 [fit: 46.851, r: 46.85, ir: 0.00]
#20 - ⬇ - #19 [fit: 46.766, r: 46.77, ir: 0.00]
#18 - ⬆ - #19 [fit: 46.759, r: 46.76, ir: 0.00]
#20 - ⬆ - ## [fit: 46.658, r: 46.66, ir: 0.00]
3# - ⬅ - 2# [fit: 46.646, r: 46.65, ir: 0.00]
#18 - ⬇ - #17 [fit: 46.544, r: 46.54, ir: 0.00]
#7 - ⬇ - #6 [fit: 46.321, r: 46.32, ir: 0.00]
#19 - ⬇ - #18 [fit: 46.090, r: 46.09, ir: 0.00]
#5 - ⬆ - #6 [fit: 45.886, r: 45.89, ir: 0.00]
#5 - ⬇ - #4 [fit: 45.651, r: 45.65, ir: 0.00]
#6 - ⬇ - #5 [fit: 45.503, r: 45.50, ir: 0.00]
#4 - ⬇ - #3 [fit: 45.333, r: 45.33, ir: 0.00]
#1 - ⬆ - #2 [fit: 45.140, r: 45.14, ir: 0.00]
#2 - ⬇ - #1 [fit: 45.038, r: 45.04, ir: 0.00]
#3 - ⬇ - #2 [fit: 44.996, r: 45.00, ir: 0.00]
#4 - ⬆ - #5 [fit: 44.968, r: 44.97, ir: 0.00]
#3 - ⬆ - #4 [fit: 44.908, r: 44.91, ir: 0.00]
#2 - ⬆ - #3 [fit: 44.757, r: 44.76, ir: 0.00]
#1 - ⬇ - ## [fit: 44.564, r: 44.56, ir: 0.00]
#13 - ⬆ - #14 [fit: 39.040, r: 39.04, ir: 0.00]

Exploitation


In [13]:
%%time
agent = ACS2(cfg, population)
pop_exploit, metric_exploit = agent.exploit(grid, 50)


INFO:lcs.agents.Agent:{'trial': 0, 'steps_in_trial': 1000, 'reward': 0}
CPU times: user 7.41 s, sys: 14.1 ms, total: 7.42 s
Wall time: 7.47 s

In [14]:
print_simple_stats(pop_exploit, metric_exploit)


Population of 80, avg steps 930.2, all steps 4651

In [15]:
for cl in sorted(population, key=lambda cl: -cl.fitness):
    print_cl(cl)


20# - ➡ - ## [fit: 80.743, r: 80.74, ir: 0.00]
1# - ⬅ - ## [fit: 49.335, r: 49.34, ir: 0.00]
#20 - ⬆ - ## [fit: 46.658, r: 46.66, ir: 0.00]
#1 - ⬇ - ## [fit: 44.564, r: 44.56, ir: 0.00]
#2 - ⬇ - #1 [fit: 0.710, r: 0.71, ir: 0.00]
#1 - ⬆ - #2 [fit: 0.709, r: 0.71, ir: 0.00]
#2 - ⬆ - #3 [fit: 0.705, r: 0.70, ir: 0.00]
#3 - ⬇ - #2 [fit: 0.568, r: 0.57, ir: 0.00]
#14 - ⬆ - #15 [fit: 0.566, r: 0.57, ir: 0.00]
#16 - ⬆ - #17 [fit: 0.566, r: 0.57, ir: 0.00]
#15 - ⬆ - #16 [fit: 0.566, r: 0.57, ir: 0.00]
#6 - ⬇ - #5 [fit: 0.566, r: 0.57, ir: 0.00]
#13 - ⬆ - #14 [fit: 0.566, r: 0.57, ir: 0.00]
#17 - ⬆ - #18 [fit: 0.565, r: 0.57, ir: 0.00]
#5 - ⬇ - #4 [fit: 0.565, r: 0.57, ir: 0.00]
#18 - ⬆ - #19 [fit: 0.565, r: 0.56, ir: 0.00]
#19 - ⬆ - #20 [fit: 0.564, r: 0.56, ir: 0.00]
#4 - ⬇ - #3 [fit: 0.564, r: 0.56, ir: 0.00]
#20 - ⬇ - #19 [fit: 0.564, r: 0.56, ir: 0.00]
#12 - ⬆ - #13 [fit: 0.564, r: 0.56, ir: 0.00]
#19 - ⬇ - #18 [fit: 0.564, r: 0.56, ir: 0.00]
#3 - ⬆ - #4 [fit: 0.563, r: 0.56, ir: 0.00]
#6 - ⬆ - #7 [fit: 0.563, r: 0.56, ir: 0.00]
#18 - ⬇ - #17 [fit: 0.563, r: 0.56, ir: 0.00]
6# - ➡ - 7# [fit: 0.563, r: 0.56, ir: 0.00]
#7 - ⬆ - #8 [fit: 0.563, r: 0.56, ir: 0.00]
7# - ➡ - 8# [fit: 0.563, r: 0.56, ir: 0.00]
3# - ➡ - 4# [fit: 0.563, r: 0.56, ir: 0.00]
#16 - ⬇ - #15 [fit: 0.563, r: 0.56, ir: 0.00]
5# - ➡ - 6# [fit: 0.563, r: 0.56, ir: 0.00]
#17 - ⬇ - #16 [fit: 0.563, r: 0.56, ir: 0.00]
4# - ➡ - 5# [fit: 0.563, r: 0.56, ir: 0.00]
#5 - ⬆ - #6 [fit: 0.563, r: 0.56, ir: 0.00]
#11 - ⬆ - #12 [fit: 0.563, r: 0.56, ir: 0.00]
8# - ➡ - 9# [fit: 0.563, r: 0.56, ir: 0.00]
#15 - ⬇ - #14 [fit: 0.562, r: 0.56, ir: 0.00]
#4 - ⬆ - #5 [fit: 0.562, r: 0.56, ir: 0.00]
#9 - ⬆ - #10 [fit: 0.562, r: 0.56, ir: 0.00]
#10 - ⬆ - #11 [fit: 0.562, r: 0.56, ir: 0.00]
9# - ➡ - 10# [fit: 0.562, r: 0.56, ir: 0.00]
4# - ⬅ - 3# [fit: 0.562, r: 0.56, ir: 0.00]
#8 - ⬆ - #9 [fit: 0.561, r: 0.56, ir: 0.00]
3# - ⬅ - 2# [fit: 0.561, r: 0.56, ir: 0.00]
#14 - ⬇ - #13 [fit: 0.561, r: 0.56, ir: 0.00]
8# - ⬅ - 7# [fit: 0.561, r: 0.56, ir: 0.00]
5# - ⬅ - 4# [fit: 0.561, r: 0.56, ir: 0.00]
7# - ⬅ - 6# [fit: 0.561, r: 0.56, ir: 0.00]
10# - ➡ - 11# [fit: 0.561, r: 0.56, ir: 0.00]
6# - ⬅ - 5# [fit: 0.561, r: 0.56, ir: 0.00]
17# - ➡ - 18# [fit: 0.561, r: 0.56, ir: 0.00]
9# - ⬅ - 8# [fit: 0.561, r: 0.56, ir: 0.00]
11# - ➡ - 12# [fit: 0.561, r: 0.56, ir: 0.00]
2# - ⬅ - 1# [fit: 0.561, r: 0.56, ir: 0.00]
16# - ➡ - 17# [fit: 0.561, r: 0.56, ir: 0.00]
15# - ➡ - 16# [fit: 0.561, r: 0.56, ir: 0.00]
12# - ➡ - 13# [fit: 0.561, r: 0.56, ir: 0.00]
18# - ➡ - 19# [fit: 0.561, r: 0.56, ir: 0.00]
#13 - ⬇ - #12 [fit: 0.561, r: 0.56, ir: 0.00]
13# - ➡ - 14# [fit: 0.560, r: 0.56, ir: 0.00]
14# - ➡ - 15# [fit: 0.560, r: 0.56, ir: 0.00]
#12 - ⬇ - #11 [fit: 0.560, r: 0.56, ir: 0.00]
10# - ⬅ - 9# [fit: 0.560, r: 0.56, ir: 0.00]
#11 - ⬇ - #10 [fit: 0.560, r: 0.56, ir: 0.00]
1# - ➡ - 2# [fit: 0.560, r: 0.56, ir: 0.00]
19# - ➡ - 20# [fit: 0.560, r: 0.56, ir: 0.00]
#8 - ⬇ - #7 [fit: 0.560, r: 0.56, ir: 0.00]
#10 - ⬇ - #9 [fit: 0.560, r: 0.56, ir: 0.00]
11# - ⬅ - 10# [fit: 0.560, r: 0.56, ir: 0.00]
17# - ⬅ - 16# [fit: 0.559, r: 0.56, ir: 0.00]
18# - ⬅ - 17# [fit: 0.559, r: 0.56, ir: 0.00]
12# - ⬅ - 11# [fit: 0.559, r: 0.56, ir: 0.00]
13# - ⬅ - 12# [fit: 0.559, r: 0.56, ir: 0.00]
16# - ⬅ - 15# [fit: 0.559, r: 0.56, ir: 0.00]
20# - ⬅ - 19# [fit: 0.559, r: 0.56, ir: 0.00]
2# - ➡ - 3# [fit: 0.559, r: 0.56, ir: 0.00]
14# - ⬅ - 13# [fit: 0.559, r: 0.56, ir: 0.00]
19# - ⬅ - 18# [fit: 0.559, r: 0.56, ir: 0.00]
15# - ⬅ - 14# [fit: 0.559, r: 0.56, ir: 0.00]
#9 - ⬇ - #8 [fit: 0.559, r: 0.56, ir: 0.00]
#7 - ⬇ - #6 [fit: 0.453, r: 0.45, ir: 0.00]

Manual tests


In [12]:
state = Perception(("3","4"))

match = population.form_match_set(state)
for cl in sorted(match, key=lambda cl: -cl.fitness):
    print_cl(cl)


3# - ➡ - 4# [fit: 48.056, r: 48.06, ir: 0.00]
3# - ⬅ - 2# [fit: 46.646, r: 46.65, ir: 0.00]
#4 - ⬇ - #3 [fit: 45.333, r: 45.33, ir: 0.00]
#4 - ⬆ - #5 [fit: 44.968, r: 44.97, ir: 0.00]